from selenium import webdriver
import time
import urllib
import urllib.request
import os
import datetime
driver=webdriver.Chrome()
url="https://www.toutiao.com/ch/movie/"
driver.get(url)
time.sleep(5)
header={
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
    "Referer": "https://www.toutiao.com/ch/movie/",
    "Sec-Fetch-Mode":" no-cors"
}
titleset=set()
while True:
    driver.execute_script('window.scrollTo(0,100000)')
    elements_li=driver.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li")
    elements_a=driver.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul//li/div/div[1]/div/div[1]/a")
    elements_img=driver.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[2]/a/img")
    elements_span=driver.find_elements_by_xpath("/html/body/div/div[4]/div[2]/div[2]/div/div/div/ul/li/div/div[1]/div/div[2]/div[1]/span")
    # path = "C:\\website\\toutiao\\images\\" + datetime.date.today().strftime('%Y-%m-%d') + "\\"
    # if not os.path.isdir(path):
    #     os.mkdir(path)
    for a in elements_a:
        link=a.get_attribute("href")
        #print(title)
        titleset.add(link)
        # link=value.find_element_by_xpath("/div/div[1]/div/div[1]/a").get_attribute('href')
        # imglink=value.find_element_by_xpath("/div/div[2]/a/img").get_attribute("src")
        # publishtime=value.find_element_by_xpath("/div/div[1]/div/div[2]/div[1]/span").text
        # time.sleep(5)
        # urllib.request.urlretrieve(imglink, '{0}{1}.jpg'.format(path, time.time()))  # 打开imgList,下载图片到本地
        #print([title,link,imglink,publishtime])
        if len(titleset) > 1000:
            print(titleset)
            driver.quit()




